#This script translates high-scoring (cosine similarity) opposition articles for Egypt into English
#It does so using GPT-4o-mini
#It produces a csv file with the translated articles
#NOTE: This script will not produce exactly the same results as the paper
#Due to the stochastic nature of the language model
import pandas as pd
import time
from openai import OpenAI
from tqdm import tqdm

# Initialize OpenAI client with API key
client = OpenAI(api_key="") #Replace with your own API key

# File paths
input_csv = "data/output/cos_sims_robustness/high_cos_sim_articles_150000030k.csv"
output_csv = "data/output/cos_sims_robustness/high_cos_sim_articles_translated_150000030k.csv"

# Read the CSV file
df = pd.read_csv(input_csv)

# Ensure the 'content' column exists
if 'content' not in df.columns:
    raise ValueError("Column 'content' not found in the dataset.")

# Function to translate text using GPT-4o-mini with retry logic
def translate_text(text, retries=3):
    if pd.isna(text) or text.strip() == "":
        return text  # Return empty values as is
    
    prompt = f"Translate the following Arabic text into English:\n\n{text}"
    
    for attempt in range(retries):
        try:
            response = client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[
                    {"role": "system", "content": "You are a professional translator."},
                    {"role": "user", "content": prompt}
                ],
                temperature=0
            )
            return response.choices[0].message.content

        except Exception as e:
            print(f"Translation error on attempt {attempt + 1}: {e}")
            time.sleep(2)  # Wait before retrying

    return text  # Return original text if translation fails

# Apply translation with a progress bar
tqdm.pandas()
df["content_translated"] = df["content"].progress_apply(translate_text)

# Save the new CSV with translations
df.to_csv(output_csv, index=False)

print(f"Translation completed. Saved to: {output_csv}")
